library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
df <- readr::read_csv("cs_1675_fall2021_finalproject.csv", col_names = TRUE)
## Rows: 1252 Columns: 11
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): m
## dbl (10): x1, x2, x3, x4, v1, v2, v3, v4, v5, output
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
df %>% glimpse ()
## Rows: 1,252
## Columns: 11
## $ x1 <dbl> 0.025878, 0.030768, 0.019325, 0.306212, 0.031296, 0.031073, 0.0~
## $ x2 <dbl> 0.255934, 0.261575, 0.020877, 0.033379, 0.259342, 0.027119, 0.0~
## $ x3 <dbl> 0.492830, 0.498460, 0.258360, 0.255385, 0.264387, 0.260915, 0.0~
## $ x4 <dbl> 0.012770, 0.055779, 0.012424, 0.056190, 0.056594, 0.055192, 0.0~
## $ v1 <dbl> 0.275651, 0.343204, 4.998508, 5.090153, 5.031107, 9.977407, 0.2~
## $ v2 <dbl> 0.033657, 0.027082, 0.030259, 0.052342, 0.517705, 0.532436, 1.0~
## $ v3 <dbl> 1.166214, 1.260579, 1.298285, 1.322005, 1.368195, 1.298797, 1.1~
## $ v4 <dbl> 0.408402, 0.664248, 0.412870, 0.652111, 0.533701, 0.857509, 0.6~
## $ v5 <dbl> 0.525226, 2.866343, 0.409007, 0.861594, 6.451933, 0.958574, 0.2~
## $ m <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A"~
## $ output <dbl> 0.786, 0.730, 0.996, 0.326, 0.735, 0.954, 0.969, 0.986, 0.874, ~
df %>% summary ()
## x1 x2 x3 x4
## Min. :0.003117 Min. :0.001173 Min. :0.003344 Min. :0.001447
## 1st Qu.:0.144825 1st Qu.:0.059488 1st Qu.:0.180333 1st Qu.:0.034732
## Median :0.278602 Median :0.170552 Median :0.263551 Median :0.055822
## Mean :0.265192 Mean :0.159282 Mean :0.262674 Mean :0.053189
## 3rd Qu.:0.352812 3rd Qu.:0.238338 3rd Qu.:0.343556 3rd Qu.:0.072108
## Max. :0.609092 Max. :0.446306 Max. :0.509710 Max. :0.101868
## v1 v2 v3 v4
## Min. : 0.003474 Min. :0.002281 Min. : 1.003 Min. :0.01867
## 1st Qu.: 3.335833 1st Qu.:0.334104 1st Qu.: 3.945 1st Qu.:0.30184
## Median : 5.137150 Median :0.515154 Median : 5.632 Median :0.48923
## Mean : 5.079560 Mean :0.503186 Mean : 5.569 Mean :0.49132
## 3rd Qu.: 6.850576 3rd Qu.:0.684780 3rd Qu.: 7.189 3rd Qu.:0.67959
## Max. :10.133807 Max. :1.018897 Max. :10.177 Max. :0.97913
## v5 m output
## Min. : 0.006831 Length:1252 Min. :0.0070
## 1st Qu.: 2.439787 Class :character 1st Qu.:0.2517
## Median : 6.496589 Mode :character Median :0.4835
## Mean : 5.867330 Mean :0.5311
## 3rd Qu.: 9.328919 3rd Qu.:0.8430
## Max. : 9.999845 Max. :0.9990
ddf <- df %>% select(-m)
stats <- tibble::tibble (variable = names (ddf),
num_missing = map_dbl (ddf, ~ sum (is.na (.))),
num_unique = map_dbl (ddf, n_distinct),
min_value = map_dbl (ddf, min),
median_value = map_dbl (ddf, median),
max_value = map_dbl (ddf, max))
stats %>% knitr::kable (caption = "variables overview")
| variable | num_missing | num_unique | min_value | median_value | max_value |
|---|---|---|---|---|---|
| x1 | 0 | 1245 | 0.003117 | 0.2786015 | 0.609092 |
| x2 | 0 | 1250 | 0.001173 | 0.1705525 | 0.446306 |
| x3 | 0 | 1250 | 0.003344 | 0.2635515 | 0.509710 |
| x4 | 0 | 1235 | 0.001447 | 0.0558215 | 0.101868 |
| v1 | 0 | 1252 | 0.003474 | 5.1371505 | 10.133807 |
| v2 | 0 | 1249 | 0.002281 | 0.5151535 | 1.018897 |
| v3 | 0 | 1252 | 1.002923 | 5.6319045 | 10.176830 |
| v4 | 0 | 1252 | 0.018665 | 0.4892350 | 0.979126 |
| v5 | 0 | 1252 | 0.006831 | 6.4965890 | 9.999845 |
| output | 0 | 690 | 0.007000 | 0.4835000 | 0.999000 |
df %>% select (- output) %>% distinct () %>% dim ()
## [1] 1252 10
df %>% ggplot (mapping = aes (x = x1, y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = x2, y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = x3, y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = x4, y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
### calculate x5, z, w, and t.
dfT <- (df %>% tibble::as_tibble () %>% mutate (x5 = (1 - (x1 + x2 + x3 + x4)), w = (x2 / (x3 + x4)), z = ((x1 + x2) / (x4 + x5)), t = (v1 * v2)))
### plot output against x5
dfT %>% ggplot (mapping = aes (x = x5, y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = w, y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = z, y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v1, y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v2, y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v3, y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v4, y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v5, y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = t, y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = x1, y = output)) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = x2, y = output)) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = x3, y = output)) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = x4, y = output)) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = x5, y = output)) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = w, y = output)) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = z, y = output)) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v1, y = output)) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v2, y = output)) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v3, y = output)) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v4, y = output)) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v5, y = output)) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = t, y = output)) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = x1, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = x2, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = x3, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = x4, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
### calculate x5, z, w, and t.
dfT <- (df %>% tibble::as_tibble () %>% mutate (x5 = (1 - (x1 + x2 + x3 + x4)), w = (x2 / (x3 + x4)), z = ((x1 + x2) / (x4 + x5)), t = (v1 * v2)))
### plot output against x5
dfT %>% ggplot (mapping = aes (x = x5, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = w, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = z, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v1, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v2, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v3, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v4, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v5, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = t, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = x1, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = x2, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = x3, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = x4, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = x5, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = w, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = z, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v1, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v2, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v3, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v4, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
df %>% ggplot (mapping = aes (x = v5, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = t, y = boot::logit (output))) + geom_point () + geom_smooth (method = lm) + facet_wrap (~m)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x5 * w), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x2 * w), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x3 * w), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x4 * w), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x1 * z), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x2 * z), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x3 * z), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x4 * z), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x5 * z), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x1 * x5), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x2 * x5), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x3 * x5), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x4 * x5), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x2 * x1), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x3 * x1), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x4 * x1), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x3 * x2), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x4 * x2), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (x4 * x3), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (v1 * t), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (v2 * t), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (v3 * t), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (v4 * t), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (v5 * t), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (v1 * v5), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (v2 * v5), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (v3 * v5), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (v4 * v5), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (v2 * v1), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (v3 * v1), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (v4 * v1), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (v3 * v2), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (v4 * v2), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'
dfT %>% ggplot (mapping = aes (x = (v4 * v3), y = output)) + geom_point () + geom_smooth (method = lm)
## `geom_smooth()` using formula 'y ~ x'